import pandas as pd
import numpy as np
C:\Users\sumed\AppData\Roaming\Python\Python311\site-packages\pandas\core\arrays\masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). from pandas.core import (
zom = pd.read_csv(r"C:\Users\sumed\OneDrive\Desktop\zomato\zomato.csv")
zom
| url | address | name | online_order | book_table | rate | votes | phone | location | rest_type | dish_liked | cuisines | approx_cost(for two people) | reviews_list | menu_item | listed_in(type) | listed_in(city) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | https://www.zomato.com/bangalore/jalsa-banasha... | 942, 21st Main Road, 2nd Stage, Banashankari, ... | Jalsa | Yes | Yes | 4.1/5 | 775 | 080 42297555\r\n+91 9743772233 | Banashankari | Casual Dining | Pasta, Lunch Buffet, Masala Papad, Paneer Laja... | North Indian, Mughlai, Chinese | 800 | [('Rated 4.0', 'RATED\n A beautiful place to ... | [] | Buffet | Banashankari |
| 1 | https://www.zomato.com/bangalore/spice-elephan... | 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... | Spice Elephant | Yes | No | 4.1/5 | 787 | 080 41714161 | Banashankari | Casual Dining | Momos, Lunch Buffet, Chocolate Nirvana, Thai G... | Chinese, North Indian, Thai | 800 | [('Rated 4.0', 'RATED\n Had been here for din... | [] | Buffet | Banashankari |
| 2 | https://www.zomato.com/SanchurroBangalore?cont... | 1112, Next to KIMS Medical College, 17th Cross... | San Churro Cafe | Yes | No | 3.8/5 | 918 | +91 9663487993 | Banashankari | Cafe, Casual Dining | Churros, Cannelloni, Minestrone Soup, Hot Choc... | Cafe, Mexican, Italian | 800 | [('Rated 3.0', "RATED\n Ambience is not that ... | [] | Buffet | Banashankari |
| 3 | https://www.zomato.com/bangalore/addhuri-udupi... | 1st Floor, Annakuteera, 3rd Stage, Banashankar... | Addhuri Udupi Bhojana | No | No | 3.7/5 | 88 | +91 9620009302 | Banashankari | Quick Bites | Masala Dosa | South Indian, North Indian | 300 | [('Rated 4.0', "RATED\n Great food and proper... | [] | Buffet | Banashankari |
| 4 | https://www.zomato.com/bangalore/grand-village... | 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... | Grand Village | No | No | 3.8/5 | 166 | +91 8026612447\r\n+91 9901210005 | Basavanagudi | Casual Dining | Panipuri, Gol Gappe | North Indian, Rajasthani | 600 | [('Rated 4.0', 'RATED\n Very good restaurant ... | [] | Buffet | Banashankari |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 51712 | https://www.zomato.com/bangalore/best-brews-fo... | Four Points by Sheraton Bengaluru, 43/3, White... | Best Brews - Four Points by Sheraton Bengaluru... | No | No | 3.6 /5 | 27 | 080 40301477 | Whitefield | Bar | NaN | Continental | 1,500 | [('Rated 5.0', "RATED\n Food and service are ... | [] | Pubs and bars | Whitefield |
| 51713 | https://www.zomato.com/bangalore/vinod-bar-and... | Number 10, Garudachar Palya, Mahadevapura, Whi... | Vinod Bar And Restaurant | No | No | NaN | 0 | +91 8197675843 | Whitefield | Bar | NaN | Finger Food | 600 | [] | [] | Pubs and bars | Whitefield |
| 51714 | https://www.zomato.com/bangalore/plunge-sherat... | Sheraton Grand Bengaluru Whitefield Hotel & Co... | Plunge - Sheraton Grand Bengaluru Whitefield H... | No | No | NaN | 0 | NaN | Whitefield | Bar | NaN | Finger Food | 2,000 | [] | [] | Pubs and bars | Whitefield |
| 51715 | https://www.zomato.com/bangalore/chime-sherato... | Sheraton Grand Bengaluru Whitefield Hotel & Co... | Chime - Sheraton Grand Bengaluru Whitefield Ho... | No | Yes | 4.3 /5 | 236 | 080 49652769 | ITPL Main Road, Whitefield | Bar | Cocktails, Pizza, Buttermilk | Finger Food | 2,500 | [('Rated 4.0', 'RATED\n Nice and friendly pla... | [] | Pubs and bars | Whitefield |
| 51716 | https://www.zomato.com/bangalore/the-nest-the-... | ITPL Main Road, KIADB Export Promotion Industr... | The Nest - The Den Bengaluru | No | No | 3.4 /5 | 13 | +91 8071117272 | ITPL Main Road, Whitefield | Bar, Casual Dining | NaN | Finger Food, North Indian, Continental | 1,500 | [('Rated 5.0', 'RATED\n Great ambience , look... | [] | Pubs and bars | Whitefield |
51717 rows × 17 columns
zom.shape
(51717, 17)
zom.columns
Index(['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes',
'phone', 'location', 'rest_type', 'dish_liked', 'cuisines',
'approx_cost(for two people)', 'reviews_list', 'menu_item',
'listed_in(type)', 'listed_in(city)'],
dtype='object')
zom.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 51717 entries, 0 to 51716 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 url 51717 non-null object 1 address 51717 non-null object 2 name 51717 non-null object 3 online_order 51717 non-null object 4 book_table 51717 non-null object 5 rate 43942 non-null object 6 votes 51717 non-null int64 7 phone 50509 non-null object 8 location 51696 non-null object 9 rest_type 51490 non-null object 10 dish_liked 23639 non-null object 11 cuisines 51672 non-null object 12 approx_cost(for two people) 51371 non-null object 13 reviews_list 51717 non-null object 14 menu_item 51717 non-null object 15 listed_in(type) 51717 non-null object 16 listed_in(city) 51717 non-null object dtypes: int64(1), object(16) memory usage: 6.7+ MB
zom.isnull().sum()
url 0 address 0 name 0 online_order 0 book_table 0 rate 7775 votes 0 phone 1208 location 21 rest_type 227 dish_liked 28078 cuisines 45 approx_cost(for two people) 346 reviews_list 0 menu_item 0 listed_in(type) 0 listed_in(city) 0 dtype: int64
zom["votes"].mean()
283.69752692538236
zom['rate'] = zom['rate'].str.extract('(\d+\.\d+)')
zom['rate']
0 4.1
1 4.1
2 3.8
3 3.7
4 3.8
...
51712 3.6
51713 NaN
51714 NaN
51715 4.3
51716 3.4
Name: rate, Length: 51717, dtype: object
zom['rate'] = zom['rate'].astype(float)
zom['rate']
0 4.1
1 4.1
2 3.8
3 3.7
4 3.8
...
51712 3.6
51713 NaN
51714 NaN
51715 4.3
51716 3.4
Name: rate, Length: 51717, dtype: float64
zom["rate"].mean()
3.700448817952718
zom['rate'] = zom['rate'].fillna(3.7)
zom['rate'].isnull().sum()
0
zom = zom.dropna(subset=['phone'])
zom.shape
(50509, 17)
zom = zom.dropna(subset=['location'])
zom.shape
(50509, 17)
zom['phone'].isnull().sum()
0
zom['location'].isnull().sum()
0
zom.shape
(50509, 17)
zom.isnull().sum()
url 0 address 0 name 0 online_order 0 book_table 0 rate 0 votes 0 phone 0 location 0 rest_type 202 dish_liked 27209 cuisines 16 approx_cost(for two people) 12 reviews_list 0 menu_item 0 listed_in(type) 0 listed_in(city) 0 dtype: int64
zom['approx_cost(for two people)'] = zom['approx_cost(for two people)'].str.replace(',', '')
zom['approx_cost(for two people)']
0 800
1 800
2 800
3 300
4 600
...
51711 800
51712 1500
51713 600
51715 2500
51716 1500
Name: approx_cost(for two people), Length: 50509, dtype: object
zom['approx_cost(for two people)'] = zom['approx_cost(for two people)'].astype(float)
zom['approx_cost(for two people)'].mean()
557.5959958017309
zom['approx_cost(for two people)'] = zom['approx_cost(for two people)'].fillna(557.6)
zom.isnull().sum()
url 0 address 0 name 0 online_order 0 book_table 0 rate 0 votes 0 phone 0 location 0 rest_type 202 dish_liked 27209 cuisines 16 approx_cost(for two people) 0 reviews_list 0 menu_item 0 listed_in(type) 0 listed_in(city) 0 dtype: int64
zom = zom.dropna(subset=['rest_type'])
zom.shape
(50307, 17)
zom = zom.dropna(subset=['cuisines'])
zom.shape
(50291, 17)
del zom['dish_liked']
zom.columns
Index(['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes',
'phone', 'location', 'rest_type', 'cuisines',
'approx_cost(for two people)', 'reviews_list', 'menu_item',
'listed_in(type)', 'listed_in(city)'],
dtype='object')
del zom['url']
zom.columns
Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
'phone', 'location', 'rest_type', 'cuisines',
'approx_cost(for two people)', 'reviews_list', 'menu_item',
'listed_in(type)', 'listed_in(city)'],
dtype='object')
del zom['address']
zom.columns
Index(['name', 'online_order', 'book_table', 'rate', 'votes', 'phone',
'location', 'rest_type', 'cuisines', 'approx_cost(for two people)',
'reviews_list', 'menu_item', 'listed_in(type)', 'listed_in(city)'],
dtype='object')
zom.isnull().sum()
name 0 online_order 0 book_table 0 rate 0 votes 0 phone 0 location 0 rest_type 0 cuisines 0 approx_cost(for two people) 0 reviews_list 0 menu_item 0 listed_in(type) 0 listed_in(city) 0 dtype: int64
zom.head()
| name | online_order | book_table | rate | votes | phone | location | rest_type | cuisines | approx_cost(for two people) | reviews_list | menu_item | listed_in(type) | listed_in(city) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Jalsa | Yes | Yes | 4.1 | 775 | 080 42297555\r\n+91 9743772233 | Banashankari | Casual Dining | North Indian, Mughlai, Chinese | 800.0 | [('Rated 4.0', 'RATED\n A beautiful place to ... | [] | Buffet | Banashankari |
| 1 | Spice Elephant | Yes | No | 4.1 | 787 | 080 41714161 | Banashankari | Casual Dining | Chinese, North Indian, Thai | 800.0 | [('Rated 4.0', 'RATED\n Had been here for din... | [] | Buffet | Banashankari |
| 2 | San Churro Cafe | Yes | No | 3.8 | 918 | +91 9663487993 | Banashankari | Cafe, Casual Dining | Cafe, Mexican, Italian | 800.0 | [('Rated 3.0', "RATED\n Ambience is not that ... | [] | Buffet | Banashankari |
| 3 | Addhuri Udupi Bhojana | No | No | 3.7 | 88 | +91 9620009302 | Banashankari | Quick Bites | South Indian, North Indian | 300.0 | [('Rated 4.0', "RATED\n Great food and proper... | [] | Buffet | Banashankari |
| 4 | Grand Village | No | No | 3.8 | 166 | +91 8026612447\r\n+91 9901210005 | Basavanagudi | Casual Dining | North Indian, Rajasthani | 600.0 | [('Rated 4.0', 'RATED\n Very good restaurant ... | [] | Buffet | Banashankari |
Summary Statistics:
summary_stats = zom.describe()
print(summary_stats)
rate votes approx_cost(for two people) count 50291.000000 50291.000000 50291.000000 mean 3.702203 287.093496 558.204573 std 0.395443 811.693496 440.890479 min 1.800000 0.000000 40.000000 25% 3.500000 7.000000 300.000000 50% 3.700000 42.000000 400.000000 75% 3.900000 201.000000 700.000000 max 4.900000 16832.000000 6000.000000
Average Rating:
average_rating = zom['rate'].mean()
print("Average Rating:", average_rating)
Average Rating: 3.7022031775069104
Most Voted Restaurant:
most_voted_restaurant = zom.loc[zom['votes'].idxmax()]['name']
print("Most Voted Restaurant:", most_voted_restaurant)
Most Voted Restaurant: Byg Brewski Brewing Company
Number of Restaurants with Online Ordering & Table Booking:
online_order_counts = zom['online_order'].value_counts()
print("Online Order Counts:\n\n", online_order_counts)
table_booking_counts = zom['book_table'].value_counts()
print("Table Booking Counts:\n\n", table_booking_counts)
Online Order Counts: online_order Yes 30028 No 20263 Name: count, dtype: int64 Table Booking Counts: book_table No 43862 Yes 6429 Name: count, dtype: int64
Average Cost for Two People:
average_cost_for_two = zom['approx_cost(for two people)'].mean()
print("Average Cost for Two People:", average_cost_for_two)
Average Cost for Two People: 558.2045733829113
Locations with the Most Restaurants:
top_locations = zom['location'].value_counts().head(5)
print("Top 5 Locations with the Most Restaurants:\n\n", top_locations)
Top 5 Locations with the Most Restaurants: location BTM 4981 HSR 2479 Koramangala 5th Block 2446 JP Nagar 2200 Whitefield 2079 Name: count, dtype: int64
Restaurant Types Distribution:
restaurant_type_distribution = zom['rest_type'].value_counts()
print("Restaurant Type Distribution:\n", restaurant_type_distribution)
Restaurant Type Distribution:
rest_type
Quick Bites 18636
Casual Dining 10234
Cafe 3618
Delivery 2514
Dessert Parlor 2202
...
Dessert Parlor, Food Court 2
Cafe, Food Court 2
Quick Bites, Kiosk 1
Sweet Shop, Dessert Parlor 1
Bakery, Food Court 1
Name: count, Length: 91, dtype: int64
Top Restaurent in a Specific Location
import ipywidgets as widgets
from IPython.display import display
areas = zom['location'].unique()
area_dropdown = widgets.Dropdown(
options=[''] + list(areas),
value='',
description='Select Area:',
)
display(area_dropdown)
Dropdown(description='Select Area:', options=('', 'Banashankari', 'Basavanagudi', 'Mysore Road', 'Jayanagar', …
def top_restaurants_in_location(data, location, top_n=5):
data['location'] = data['location'].str.lower().str.strip()
location = location.lower().strip()
selected_location = data[data['location'] == location]
top_restaurants = selected_location.sort_values(by='rate', ascending=False).head(top_n)
result_df = top_restaurants[['name', 'approx_cost(for two people)', 'rate']]
return result_df
selected_area = area_dropdown.value
if selected_area:
top_restaurants_dataframe = top_restaurants_in_location(zom, selected_area)
print(f"Top restaurants in '{selected_area}':\n")
print(top_restaurants_dataframe)
else:
print("Please select an area from the dropdown.")
Please select an area from the dropdown.
import seaborn as sns
import matplotlib.pyplot as plt
# Plot of the Restaurant, whether they are accepting online_order or not
sns.countplot(x='online_order', data=zom)
plt.title('Restaurants delivering online or Not')
plt.xlabel('Online Order Availability')
plt.ylabel('Count')
plt.show()
import matplotlib.pyplot as plt
# Top 10 Restaurant Types
plt.figure(figsize=(8, 6))
zom['rest_type'].value_counts().nlargest(10).plot(kind='bar', color='skyblue')
plt.title('Top 10 Restaurant Types')
plt.xlabel('Restaurant Type')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
# Crosstab of rating and listed_in(city)
rel_type = pd.crosstab(zom['rate'], zom['listed_in(city)'])
# Stacked bar plot
rel_type.plot(kind='bar', stacked=True, figsize=(20, 16), colormap='viridis')
plt.title('City - Rating', fontsize=18)
plt.ylabel('City', fontsize=12)
plt.xlabel('Rating', fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
# Count plot of listed_in(type)
plt.figure(figsize=(10, 10))
sns.countplot(x='listed_in(type)', data=zom)
plt.title('Restaurants in Listed Types')
plt.xlabel('Listed Type')
plt.ylabel('Count')
plt.show()
Heatmap overlay on a map of Bangalore indicating the concentration of restaurants in a particular area.
import pandas as pd
# Getting unique locations
locations = pd.DataFrame({"Name": zom['location'].unique()})
locations.head()
| Name | |
|---|---|
| 0 | Banashankari |
| 1 | Basavanagudi |
| 2 | Mysore Road |
| 3 | Jayanagar |
| 4 | Kumaraswamy Layout |
from geopy.geocoders import Nominatim
import pandas as pd
import numpy as np
locations['Name'] = locations['Name'].apply(lambda x: "Bangaluru " + str(x)) # Add 'Bangaluru' to each location name
# Getting latitude and longitude using GeoPy
lat_lon = []
geolocator = Nominatim(user_agent="app")
for location in locations['Name']:
location_data = geolocator.geocode(location)
if location_data is None:
lat_lon.append(np.nan)
else:
geo = (location_data.latitude, location_data.longitude)
lat_lon.append(geo)
locations['geo_loc'] = lat_lon
locations.to_csv('locations.csv', index=False)
locations["Name"] = locations['Name'].apply(lambda x: x.replace("Bangaluru", "").strip())
locations.head()
| Name | geo_loc | |
|---|---|---|
| 0 | Banashankari | (12.8683575, 77.51705850680827) |
| 1 | Basavanagudi | (12.9405198, 77.5794387) |
| 2 | Mysore Road | (12.3668428, 76.6620074) |
| 3 | Jayanagar | (12.9282849, 77.5868566) |
| 4 | Kumaraswamy Layout | (12.9081487, 77.5553179) |
import folium
# Defining a base map function
def generateBaseMap(default_location=[12.97, 77.59], default_zoom_start=12):
base_map = folium.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start)
return base_map
# Getting the restaurant count with their name and locations
Rest_locations = pd.DataFrame(zom['location'].value_counts().reset_index())
Rest_locations.columns = ['Name', 'count']
Rest_locations = Rest_locations.merge(locations, on='Name', how="left").dropna()
Rest_locations
| Name | count | geo_loc | |
|---|---|---|---|
| 0 | BTM | 4981 | (12.911275849999999, 77.60456543431182) |
| 1 | HSR | 2479 | (12.9136474, 77.6377482) |
| 2 | Koramangala 5th Block | 2446 | (12.9348429, 77.6189768) |
| 3 | JP Nagar | 2200 | (12.9120761, 77.57939318031076) |
| 4 | Whitefield | 2079 | (13.01072835, 77.76149827911348) |
| ... | ... | ... | ... |
| 88 | West Bangalore | 6 | (13.022234699999998, 77.56718324768107) |
| 89 | Yelahanka | 5 | (13.133592700000001, 77.59708590318306) |
| 90 | Jakkur | 3 | (13.0784743, 77.6068938) |
| 91 | Rajarajeshwari Nagar | 2 | (12.9363054, 77.5180883) |
| 92 | Peenya | 1 | (13.0324174, 77.5338863) |
89 rows × 3 columns
import folium
from folium.plugins import HeatMap
import numpy as np
lat, lon = zip(*np.array(Rest_locations['geo_loc']))
Rest_locations['lat'] = lat
Rest_locations['lon'] = lon
basemap = generateBaseMap()
HeatMap(Rest_locations[['lat', 'lon', 'count']].values.tolist(), radius=15).add_to(basemap)
<folium.plugins.heat_map.HeatMap at 0x15d6b3e2750>
basemap
Scatter Plot for Cost vs. Rating:
plt.figure(figsize=(20, 6))
sns.scatterplot(x='approx_cost(for two people)', y='rate', data=zom)
plt.title('Scatter Plot of Cost vs. Rating')
plt.xlabel('Cost for Two People')
plt.ylabel('Rating')
plt.show()
import plotly.graph_objects as go
zom['approx_cost(for two people)'] = zom['approx_cost(for two people)'].replace('[^\d.]', '', regex=True).astype(float)
avg_cost_data = zom.groupby('location')['approx_cost(for two people)'].mean().reset_index()
fig = go.Figure(go.Treemap(
labels=avg_cost_data['location'],
parents=[""] * len(avg_cost_data['location']),
values=avg_cost_data['approx_cost(for two people)'],
texttemplate="%{label}<br>Avg Cost: %{value}",
hoverinfo="label+value+text",
marker=dict(
colorscale="Viridis",
cmin=0,
cmax=avg_cost_data['approx_cost(for two people)'].max(),
colorbar=dict(title="Avg Cost for Two People")
)
))
fig.update_layout(
title='Location-wise Average Cost for Two People (Tree Map)',
margin=dict(l=0, r=0, b=0, t=40),
)
fig.show()
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# Assuming your DataFrame has a column 'cuisines'
# Replace 'zom' and column name accordingly if they are different
cuisines_text = ' '.join(zom['cuisines'].dropna())
# Create WordCloud
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(cuisines_text)
# Display the WordCloud image using Matplotlib
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Most Liked Food by Bangaloreans (Word Cloud)')
plt.show()
import matplotlib.pyplot as plt
import seaborn as sns
restaurant_counts = zom['name'].value_counts()
N = 10
largest_chains = restaurant_counts.head(N)
sns.set(style="whitegrid")
plt.figure(figsize=(12, 8))
colors = sns.color_palette("pastel", N)
bars = largest_chains.sort_values().plot(kind='barh', color=colors)
for bar, count in zip(bars.patches, largest_chains.sort_values()):
plt.text(bar.get_width() + 0.1, bar.get_y() + bar.get_height()/2 - 0.2,
str(count), ha='center', va='center', fontsize=10, color='black')
plt.title('Top {} Largest Restaurant Chains in Bangalore'.format(N), fontsize=16)
plt.xlabel('Number of Outlets', fontsize=12)
plt.ylabel('Restaurant Chain', fontsize=12)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.tight_layout()
plt.show()
numeric_columns = zom.select_dtypes(include=['number'])
corr = numeric_columns.corr()
plt.figure(figsize=(15, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.show()
import folium
from folium.plugins import HeatMap
import numpy as np
# Assuming Rest_locations and generateBaseMap functions are defined
# Replace these with your actual data and functions
lat, lon = zip(*np.array(Rest_locations['geo_loc']))
Rest_locations['lat'] = lat
Rest_locations['lon'] = lon
basemap = generateBaseMap()
HeatMap(Rest_locations[['lat', 'lon', 'count']].values.tolist(), radius=15).add_to(basemap)
# Save the Folium map as an HTML file
basemap.save("heatmap_map.html")